<?php
set_time_limit(0);

$dbhost = 'localhost';
$dbname = 'dianping';
$dbuser = '';
$dbpwd = '';
$tbname = 'review';
$cookie = '/tmp/cookiecfP1sd';
$ch = curl_init();
curl_setopt($ch,CURLOPT_COOKIEJAR,$cookie);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,TRUE);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,TRUE);
curl_setopt($ch,CURLOPT_USERAGENT,"Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");

include('db.class.php');
$db = new db();
$db->connect($dbhost,$dbuser,$dbpwd,$dbname,1,'UTF8');
revise();
run();

function revise(){
    global $db;
    $shopN = $db->get_one("select * from review_shop where review = 'N' limit 1");
    $shopid = $shopN['id'];
    if(!empty($shopN)){
        $reviewN = $db->get_one("select * from review where shopid = '$shopid' limit 1");
        if(!empty($reviewN)){
            if($shopid > 1){
                $db->query("delete from review where shopid = '$shopid'");
                $db->query("ALTER TABLE review AUTO_INCREMENT = 1");
                echo 'start_shopid: '.$shopid.PHP_EOL;
            }else{
                echo '删除时ID有误，程序终止';
                exit;
            }
        }
    }else{
        echo date("Y-m-d H:i:s").' 采集完成'.PHP_EOL;
        exit;
    }
}

function run(){
    global $db;
    sleep(3);
    $shop = $db->get_one("select * from review_shop where review = 'N' limit 1");
    if(!empty($shop)){
        preg_match('/\/shop\/([0-9]+)/',$shop['shopurl'],$match_num);
        if(!empty($match_num[1])){
            $reurl = 'http://www.dianping.com/shop/'.$match_num[1].'/review_more';
            //$reurl = 'http://www.dianping.com/shop/2488519/review_more';  //4页
            //$reurl = 'http://www.dianping.com/shop/3286516/review_more';  //1页
            //$reurl = 'http://www.dianping.com/shop/3972763/review_more';  //0页
            $html_root = curl($reurl); 
            if(!empty($html_root)){
                preg_match('/pageno=/',$html_root,$match_pageno);
                if(!empty($match_pageno)){    //有分页
                    preg_match_all('/<a[^<]+class="PageLink".+title="(.+)".+<\/a>/U',$html_root,$match_pagelink);
                    if(!empty($match_pagelink[1])){
                        $page_num =  max($match_pagelink[1]);         
                        for($i=1;$i<=$page_num;$i++){
                            $rad = rand(1,gaptime());
                            sleep($rad);
                            save($reurl.'?pageno='.$i,$shop['id'],$i);   
                        }
                    }else{
                        $logs = array('mark'=>'页码匹配错误','url'=>$reurl,'time'=>date("Y-m-d H:i:s"));
                        $db->insert('logs',$logs);
                    } 
                }else{    //无分页
                    save($reurl,$shop['id'],1);
                }
            }else{
                $logs = array('mark'=>'评论首页地址错误','url'=>$reurl,'time'=>date("Y-m-d H:i:s"));
                $db->insert('logs',$logs);
            }
             
        }else{
            $logs = array('mark'=>'商家地址错误','url'=>$shop['shopurl'],'time'=>date("Y-m-d H:i:s"));
            $db->insert('logs',$logs);
        }
        $db->query("update review_shop set review = 'Y' where id = ".$shop['id']); 
        run();
    }else{
        echo date("Y-m-d H:i:s").' 采集完成'.PHP_EOL;
    }
}

function save($url,$shopid,$pageno){
    global $db;
    global $tbname;
    global $ch;
    curl_setopt($ch,CURLOPT_URL,$url);
    $html = curl_exec($ch);
    $html = str_replace("\n","",$html);
    preg_match_all('/id="review_[0-9]+_summary">(.+)<\/div>/U',$html,$match);
    if(!empty($match[1])){
        foreach($match[1] as $key=>$val){
            $arr[] = "('".$shopid."','".$pageno."','".mysql_real_escape_string($val)."')";
        }    
        $str = implode(',',$arr);
        $sql = 'insert into '.$tbname.' (shopid,pageno,comment)  values '.$str;
        //echo PHP_EOL.$sql.PHP_EOL;
        $db->query($sql);
    }
}

function curl($url){
    global $ch;
    curl_setopt($ch,CURLOPT_URL,$url);
    $html = curl_exec($ch);
    $html = str_replace("\n","",$html);
    preg_match('/请输入验证码后继续浏览/',$html,$match_code);
    if(!empty($match_code)){
        echo '验证码 '.$url.' '.date("Y-m-d H:i:s").PHP_EOL;
        echo $html.PHP_EOL;
        exit;
    }
    return $html;
}

function gaptime(){
    $r1 = rand(1,15);
    $arr = array(1=>3,2=>3,3=>3,4=>3,5=>30,6=>3,7=>3,8=>15,9=>3,10=>3,11=>3,12=>3,13=>3,14=>3,15=>3);
    $r2 = $arr[$r1];
    if($r2 == 30){
        return rand(31,32);
    }elseif($r2 == 15){
        return rand(10,15);
    }else{
        return rand(5,8); 
    }
}

?>
